package org.rainwalk.library.service.impl;

import cn.hutool.core.io.FileUtil;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.multipdf.Splitter;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.rainwalk.library.model.domain.BooleanResult;
import org.rainwalk.library.model.domain.SubFile;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Service;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * PDF文件服务
 *
 * @author 趁雨行 2021-07-16 23:35:06
 */
@Slf4j
@Service
@ConfigurationProperties(prefix = "upload-file.pdf")
public class FileService4PdfImpl extends FileServiceAbstract {

    @Setter
    private int maxPage;

    @Override
    public BooleanResult<String> canParse(File file) {
        BooleanResult<String> superResult = super.canParse(file);
        if (!superResult.isBool()) {
            return superResult;
        }
        //检验页码
        PDDocument source;
        try {
            source = Loader.loadPDF(file);
        } catch (IOException e) {
            log.warn("加载pdf文件失败，文件路径：{}", file.getAbsolutePath());
            return new BooleanResult<>(false, "加载pdf文件失败");
        }
        return new BooleanResult<>(maxPage >= source.getNumberOfPages(), String.format("pdf页数不可超过%d页！", maxPage));
    }

    @Override
    public List<SubFile> splitFile(File file) throws IOException {
        log.info("拆分pdf文件-----pdf文件服务-----入参文件：{}", file);
        Splitter splitter = new Splitter();
        PDDocument source = Loader.loadPDF(file);
        List<PDDocument> documents = splitter.split(source);
        String suffix = FileUtil.getSuffix(file);
        //建立一个子文件目录
        String filenameWithoutSuffix = file.getName().replaceFirst("\\." + suffix, "");
        File directory = new File(file.getParent(), filenameWithoutSuffix);
        directory.mkdirs();
        List<SubFile> subFiles = new ArrayList<>();
        int i = 0;
        for (PDDocument document : documents) {
            i++;
            //检查是否存在文字，不存在则不需要保存
            PDFTextStripper stripper = new PDFTextStripper();
            String content = stripper.getText(document);
            if (content.replaceAll("[\t\n\r�]", "").length() == 0) {
                //没有文本，不需要保存
                log.info("pdf拆分，第{}页不存在文字，不需要拆分", i);
                continue;
            }
            File child = new File(directory, i + ".pdf");
            document.save(child);
            SubFile subFile = new SubFile();
            subFile.setFile(child);
            subFile.setSubNo(i + "");
            subFiles.add(subFile);
        }
        return subFiles;
    }

    /**
     * 清除拆分文件时可能产生的资源
     *
     * @param file     主文件
     * @param subFiles
     */
    @Override
    public void cleanResourceOnSplitFile(File file, List<SubFile> subFiles) {
        String subFileDirectoryName = file.getName().replaceFirst("\\." + FileUtil.getSuffix(file), "");
        File subFileDirectory = new File(file.getParent(), subFileDirectoryName);
        FileUtil.del(subFileDirectory);
    }
}
